#Keep non-zero count genes only
Illumina_featureCounts <- Illumina_featureCounts[rowSums(Illumina_featureCounts) > 0,]
ONT_featureCounts <- ONT_featureCounts[rowSums(ONT_featureCounts) > 0,]

intersection <- intersect(row.names(Illumina_featureCounts),row.names(ONT_featureCounts))

Illumina_filtered_featureCounts <- Illumina_featureCounts[intersection,]
ONT_filtered_featureCounts <- ONT_featureCounts[intersection,]
ONT_filtered_sample_info <- data.frame(
  condition = gsub("^[^_]+_|\\d+$", "", colnames(ONT_filtered_featureCounts)),
  flowcell = gsub("_.*$", "", colnames(ONT_filtered_featureCounts)),
  mouse=gsub("^[^_]+_", "", colnames(ONT_filtered_featureCounts)),
  row.names = colnames(ONT_filtered_featureCounts)
)

Illumina_filtered_sample_info <- data.frame(
  condition = gsub("^[^_]+_|\\d+$", "", colnames(Illumina_filtered_featureCounts)),
  mouse=gsub("^[^_]+_", "", colnames(Illumina_filtered_featureCounts)),
  row.names = colnames(Illumina_filtered_featureCounts)
)
Illumina_filtered_DESeq.ds <- DESeqDataSetFromMatrix(countData = Illumina_filtered_featureCounts, 
                                   colData = Illumina_filtered_sample_info,
                                   design = ~ condition)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ONT_filtered_DESeq.ds <- DESeqDataSetFromMatrix(countData = ONT_filtered_featureCounts, 
                                   colData = ONT_filtered_sample_info,
                                   design = ~ condition)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

Test Correlation of counts for the same mouse across platforms

# 'mouse' is the condition column
mouse <- colData(ONT_filtered_DESeq.ds)$mouse

# Convert count matrix to a data frame
count_df <- as.data.frame(counts(ONT_filtered_DESeq.ds))

# Get unique values in 'mouse'
unique_mice <- unique(mouse)

# Initialize an empty data frame to store averaged counts for ONT
ONT_agg_counts <- data.frame(matrix(0, nrow = nrow(count_df), ncol = length(unique_mice)))
colnames(ONT_agg_counts) <- unique_mice

# Iterate over unique values and calculate mean counts
for (m in unique_mice) {
  col_indices <- which(mouse == m)
  ONT_agg_counts[, m] <- as.integer(rowMeans(count_df[, col_indices, drop = FALSE]))
}

ONT_agg_sample_info <- data.frame(
  condition = gsub("^[^_]+_|\\d+$", "", colnames(ONT_agg_counts)),
  mouse=gsub("^[^_]+_", "", colnames(ONT_agg_counts)),
  row.names = colnames(ONT_agg_counts)
)

# Convert back to DESeqDataSet
ONT_agg.ds <- DESeqDataSetFromMatrix(countData = ONT_agg_counts, 
                                   colData = ONT_agg_sample_info,
                                   design = ~ condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
row.names(ONT_agg.ds) <- row.names(ONT_filtered_DESeq.ds)
# Extract gene counts data
counts_ONT <- counts(ONT_agg.ds)
counts_Illumina <- counts(Illumina_filtered_DESeq.ds)

# Create an empty data frame to store results
CompareCounts <- data.frame()

# Create a list to store correlation results
cor_results <- list()

# Iterate over unique mice
for (mouse in unique(ONT_agg.ds$mouse)) {
  
  # Subset counts for the current mouse
  counts_condition_ONT <- counts_ONT[, ONT_agg.ds$mouse == mouse]
  counts_condition_Illumina <- counts_Illumina[, Illumina_filtered_DESeq.ds$mouse == mouse]
  
  # Create a data frame for the current condition
  current_comparison <- data.frame(
    ONT = log2(counts_condition_ONT + 1),  # Add 1 to avoid log(0)
    Illumina = log2(counts_condition_Illumina + 1)
  )
  
  # Perform Spearman correlation test
  spearman_cor <- cor.test(current_comparison$ONT, current_comparison$Illumina, method="spearman")
  cor_results[[mouse]] <- spearman_cor$estimate
  cat("Spearman correlation for", mouse, ":", spearman_cor$estimate, "\n")
  
  plot(
    current_comparison$ONT,
    current_comparison$Illumina,
    cex = 0.1,
    main = paste("ONT vs Illumina for", mouse),
    xlab = "ONT counts",
    ylab = "Illumina counts"
  )
}
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties
## Spearman correlation for F11 : 0.7613154
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F15 : 0.7742254
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F16 : 0.7905502
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F17 : 0.772652
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F18 : 0.7396256
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F19 : 0.7670523
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F20 : 0.7588107
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F9 : 0.7781917
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC11 : 0.7569103
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC15 : 0.7565177
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC16 : 0.7548113
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC9 : 0.7637662
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC17 : 0.7694372
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC18 : 0.7666517
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC19 : 0.761132
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC20 : 0.7596869

Pick random mice to compare against each other for spearman r value

# Extract gene counts data
counts_ONT <- counts(ONT_agg.ds)
counts_Illumina <- counts(Illumina_filtered_DESeq.ds)

# Create an empty data frame to store results
CompareCounts <- data.frame()

# Create a list to store correlation results
cor_results <- list()

# Set seed for reproducibility
set.seed(123)

# Number of random comparisons
num_comparisons <- 5  # Adjust as needed

# Randomly select mice for comparison
selected_mice <- sample(unique(ONT_agg.ds$mouse), num_comparisons)

# Iterate over selected mice
for (ONT_mouse in selected_mice) {
  
  # Subset counts for the current condition
  counts_mouse_ONT <- counts_ONT[, ONT_agg.ds$mouse == ONT_mouse]
  
  # Randomly select a mouse from Illumina dataset
  random_mouse_Illumina <- sample(unique(Illumina_filtered_DESeq.ds$mouse), 1)
  counts_mouse_Illumina <- counts_Illumina[, Illumina_filtered_DESeq.ds$mouse == random_mouse_Illumina]
  
  # Create a data frame for the current comparison
  current_comparison <- data.frame(
    ONT = log2(counts_mouse_ONT + 1),  # Add 1 to avoid log(0)
    Illumina = log2(counts_mouse_Illumina + 1)
  )
  
  # Perform Spearman correlation test
  spearman_cor <- cor.test(current_comparison$ONT, current_comparison$Illumina, method="spearman")
  cor_results[[paste(ONT_mouse, "vs", random_mouse_Illumina)]] <- spearman_cor$estimate
  cat("Spearman correlation for", ONT_mouse, "vs", random_mouse_Illumina, ":", spearman_cor$estimate, "\n")
  plot(
    current_comparison$ONT,
    current_comparison$Illumina,
    cex = 0.1,
    main = paste("ONT vs Illumina for", ONT_mouse, "vs", random_mouse_Illumina),
    xlab = "ONT counts",
    ylab = "Illumina counts"
  )
}
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties
## Spearman correlation for GC19 vs F15 : 0.7575328
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC20 vs F19 : 0.75714
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F16 vs GC16 : 0.7888914
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC18 vs F18 : 0.7694435
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC15 vs F17 : 0.7527017

Because the correlation r value is not a reliable metric, I decided to use only the differentially expressed genes between conditions to make this assessment.

Perform DESeq

ONT_agg.ds %<>% DESeq()
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 34 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
# normalize for diffs in sequencing depth and abundance per sample
ONT_agg.ds %<>% estimateSizeFactors()
# gene-wise dispersion estimates across all samples
ONT_agg.ds %<>% estimateDispersions()
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
# fit a neg. binomial GLM and compute Wald stat for each gene 
ONT_agg.ds %<>% nbinomWaldTest()

ONT_agg.ds
## class: DESeqDataSet 
## dim: 24276 16 
## metadata(1): version
## assays(6): counts mu ... replaceCounts replaceCooks
## rownames(24276): ENSMUSG00000000001 ENSMUSG00000000028 ...
##   ENSMUSG00002076809 ENSMUSG00002076896
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(16): F11 F15 ... GC19 GC20
## colData names(4): condition mouse sizeFactor replaceable
Illumina_filtered_DESeq.ds %<>% DESeq()
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 247 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
# normalize for diffs in sequencing depth and abundance per sample
Illumina_filtered_DESeq.ds %<>% estimateSizeFactors()
# gene-wise dispersion estimates across all samples
Illumina_filtered_DESeq.ds %<>% estimateDispersions()
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
# fit a neg. binomial GLM and compute Wald stat for each gene 
Illumina_filtered_DESeq.ds %<>% nbinomWaldTest()

Illumina_filtered_DESeq.ds
## class: DESeqDataSet 
## dim: 24276 16 
## metadata(1): version
## assays(6): counts mu ... replaceCounts replaceCooks
## rownames(24276): ENSMUSG00000000001 ENSMUSG00000000028 ...
##   ENSMUSG00002076809 ENSMUSG00002076896
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(16): F11 F15 ... GC20 GC9
## colData names(4): condition mouse sizeFactor replaceable
ONT_agg.results <- results(ONT_agg.ds, independentFiltering = TRUE, alpha = 0.05)
# the first line will tell you which comparison was done to achieve the log2FC 
head(ONT_agg.results)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE       stat    pvalue
##                    <numeric>      <numeric> <numeric>  <numeric> <numeric>
## ENSMUSG00000000001  4.540212     -0.6380618  0.403548 -1.5811288 0.1138486
## ENSMUSG00000000028  0.720563     -0.7743265  1.061690 -0.7293343 0.4657972
## ENSMUSG00000000031  0.000000             NA        NA         NA        NA
## ENSMUSG00000000037  0.145672     -0.1387489  2.633382 -0.0526885 0.9579801
## ENSMUSG00000000049  1.032558     -1.5466285  0.934413 -1.6551874 0.0978865
## ENSMUSG00000000056 11.450942     -0.0867128  0.250641 -0.3459647 0.7293692
##                         padj
##                    <numeric>
## ENSMUSG00000000001        NA
## ENSMUSG00000000028        NA
## ENSMUSG00000000031        NA
## ENSMUSG00000000037        NA
## ENSMUSG00000000049        NA
## ENSMUSG00000000056  0.999669
Illumina_filtered_DESeq.results <- results(Illumina_filtered_DESeq.ds, independentFiltering = TRUE, alpha = 0.05)
# the first line will tell you which comparison was done to achieve the log2FC 
head(Illumina_filtered_DESeq.results)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE      stat    pvalue
##                    <numeric>      <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000000001 249.28453     -0.0107038 0.0688444 -0.155478  0.876445
## ENSMUSG00000000028  26.95403     -0.1208694 0.2289548 -0.527918  0.597556
## ENSMUSG00000000031   7.58665      0.7204768 0.5462932  1.318846  0.187220
## ENSMUSG00000000037  20.71287     -0.0747751 0.2786427 -0.268355  0.788426
## ENSMUSG00000000049   1.70330      1.8438428 0.9007218  2.047073  0.040651
## ENSMUSG00000000056 600.68133     -0.0805442 0.0513863 -1.567426  0.117015
##                         padj
##                    <numeric>
## ENSMUSG00000000001  0.964969
## ENSMUSG00000000028        NA
## ENSMUSG00000000031        NA
## ENSMUSG00000000037        NA
## ENSMUSG00000000049        NA
## ENSMUSG00000000056  0.464872
summary(ONT_agg.results)
## 
## out of 16768 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 20, 0.12%
## LFC < 0 (down)     : 10, 0.06%
## outliers [1]       : 34, 0.2%
## low counts [2]     : 11408, 68%
## (mean count < 7)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(Illumina_filtered_DESeq.results)
## 
## out of 24276 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 313, 1.3%
## LFC < 0 (down)     : 251, 1%
## outliers [1]       : 247, 1%
## low counts [2]     : 11580, 48%
## (mean count < 44)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Sort by log2FoldChange

ONT_agg.results.sorted <- ONT_agg.results %>% `[`(order(.$log2FoldChange),)

Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(.$log2FoldChange),)

intersect <- list(ONT = row.names(ONT_agg.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

intersect <- list(ONT = row.names(ONT_agg.results.sorted)[23776:24276], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[23776:24276])

# 2D Venn diagram
ggVennDiagram(intersect)

Sort by padj

ONT_agg.results.sorted <- ONT_agg.results %>% `[`(order(.$padj),)
head(ONT_agg.results.sorted)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE      stat      pvalue
##                    <numeric>      <numeric> <numeric> <numeric>   <numeric>
## ENSMUSG00000056055 1701.0914       0.787510 0.0770155  10.22535 1.52676e-24
## ENSMUSG00000018102  147.0009      -0.573914 0.0738193  -7.77459 7.56935e-15
## ENSMUSG00000025496   88.4796      -0.776998 0.1102347  -7.04858 1.80753e-12
## ENSMUSG00000017344  290.5969       0.339869 0.0529559   6.41796 1.38111e-10
## ENSMUSG00000023979  571.9141       0.462316 0.0730309   6.33041 2.44506e-10
## ENSMUSG00000045193  127.8507       0.484243 0.0771691   6.27509 3.49435e-10
##                           padj
##                      <numeric>
## ENSMUSG00000056055 8.13154e-21
## ENSMUSG00000018102 2.01572e-11
## ENSMUSG00000025496 3.20896e-09
## ENSMUSG00000017344 1.83895e-07
## ENSMUSG00000023979 2.60448e-07
## ENSMUSG00000045193 3.10182e-07
Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(.$padj),)
head(Illumina_filtered_DESeq.results.sorted)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE      stat      pvalue
##                    <numeric>      <numeric> <numeric> <numeric>   <numeric>
## ENSMUSG00000025496  2452.706      -0.833180 0.0463264  -17.9850 2.55338e-72
## ENSMUSG00000025498   214.126      -1.480849 0.1037658  -14.2711 3.31458e-46
## ENSMUSG00000056055 31957.333       0.646452 0.0536176   12.0567 1.78794e-33
## ENSMUSG00000024227  3261.747       0.835473 0.0743905   11.2309 2.87512e-29
## ENSMUSG00000024900  2150.875       0.485247 0.0443648   10.9376 7.61519e-28
## ENSMUSG00000021270 20519.887      -0.408463 0.0377699  -10.8145 2.93861e-27
##                           padj
##                      <numeric>
## ENSMUSG00000025496 3.17871e-68
## ENSMUSG00000025498 2.06316e-42
## ENSMUSG00000056055 7.41934e-30
## ENSMUSG00000024227 8.94810e-26
## ENSMUSG00000024900 1.89603e-24
## ENSMUSG00000021270 6.09713e-24
intersect <- list(ONT = row.names(ONT_agg.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

intersect <- list(ONT = row.names(ONT_agg.results.sorted)[23776:24276], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[23776:24276])

# 2D Venn diagram
ggVennDiagram(intersect)

Sort by stat

ONT_agg.results.sorted <- ONT_agg.results %>% `[`(order(-.$stat),)
head(ONT_agg.results.sorted)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE      stat      pvalue
##                    <numeric>      <numeric> <numeric> <numeric>   <numeric>
## ENSMUSG00000056055 1701.0914       0.787510 0.0770155  10.22535 1.52676e-24
## ENSMUSG00000017344  290.5969       0.339869 0.0529559   6.41796 1.38111e-10
## ENSMUSG00000023979  571.9141       0.462316 0.0730309   6.33041 2.44506e-10
## ENSMUSG00000045193  127.8507       0.484243 0.0771691   6.27509 3.49435e-10
## ENSMUSG00000024227   39.0702       0.888378 0.1479006   6.00659 1.89463e-09
## ENSMUSG00000029276   39.1079       0.744895 0.1328417   5.60739 2.05404e-08
##                           padj
##                      <numeric>
## ENSMUSG00000056055 8.13154e-21
## ENSMUSG00000017344 1.83895e-07
## ENSMUSG00000023979 2.60448e-07
## ENSMUSG00000045193 3.10182e-07
## ENSMUSG00000024227 1.44154e-06
## ENSMUSG00000029276 1.36748e-05
Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(-.$stat),)
head(Illumina_filtered_DESeq.results.sorted)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE      stat      pvalue
##                    <numeric>      <numeric> <numeric> <numeric>   <numeric>
## ENSMUSG00000056055 31957.333       0.646452 0.0536176  12.05671 1.78794e-33
## ENSMUSG00000024227  3261.747       0.835473 0.0743905  11.23091 2.87512e-29
## ENSMUSG00000024900  2150.875       0.485247 0.0443648  10.93765 7.61519e-28
## ENSMUSG00000029276   674.380       0.709556 0.0659643  10.75666 5.51296e-27
## ENSMUSG00000028630  1713.526       0.416220 0.0487700   8.53434 1.40962e-17
## ENSMUSG00000037428   571.629       0.839454 0.1001182   8.38462 5.08879e-17
##                           padj
##                      <numeric>
## ENSMUSG00000056055 7.41934e-30
## ENSMUSG00000024227 8.94810e-26
## ENSMUSG00000024900 1.89603e-24
## ENSMUSG00000029276 9.80440e-24
## ENSMUSG00000028630 1.94982e-14
## ENSMUSG00000037428 5.27919e-14
intersect <- list(ONT = row.names(ONT_agg.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

Sort by pvalue

ONT_agg.results.sorted <- ONT_agg.results %>% `[`(order(.$pvalue),)
head(ONT_agg.results.sorted)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE      stat      pvalue
##                    <numeric>      <numeric> <numeric> <numeric>   <numeric>
## ENSMUSG00000056055 1701.0914       0.787510 0.0770155  10.22535 1.52676e-24
## ENSMUSG00000018102  147.0009      -0.573914 0.0738193  -7.77459 7.56935e-15
## ENSMUSG00000025496   88.4796      -0.776998 0.1102347  -7.04858 1.80753e-12
## ENSMUSG00000017344  290.5969       0.339869 0.0529559   6.41796 1.38111e-10
## ENSMUSG00000023979  571.9141       0.462316 0.0730309   6.33041 2.44506e-10
## ENSMUSG00000045193  127.8507       0.484243 0.0771691   6.27509 3.49435e-10
##                           padj
##                      <numeric>
## ENSMUSG00000056055 8.13154e-21
## ENSMUSG00000018102 2.01572e-11
## ENSMUSG00000025496 3.20896e-09
## ENSMUSG00000017344 1.83895e-07
## ENSMUSG00000023979 2.60448e-07
## ENSMUSG00000045193 3.10182e-07
Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(.$pvalue),)
head(Illumina_filtered_DESeq.results.sorted)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE      stat      pvalue
##                    <numeric>      <numeric> <numeric> <numeric>   <numeric>
## ENSMUSG00000025496  2452.706      -0.833180 0.0463264  -17.9850 2.55338e-72
## ENSMUSG00000025498   214.126      -1.480849 0.1037658  -14.2711 3.31458e-46
## ENSMUSG00000056055 31957.333       0.646452 0.0536176   12.0567 1.78794e-33
## ENSMUSG00000024227  3261.747       0.835473 0.0743905   11.2309 2.87512e-29
## ENSMUSG00000024900  2150.875       0.485247 0.0443648   10.9376 7.61519e-28
## ENSMUSG00000021270 20519.887      -0.408463 0.0377699  -10.8145 2.93861e-27
##                           padj
##                      <numeric>
## ENSMUSG00000025496 3.17871e-68
## ENSMUSG00000025498 2.06316e-42
## ENSMUSG00000056055 7.41934e-30
## ENSMUSG00000024227 8.94810e-26
## ENSMUSG00000024900 1.89603e-24
## ENSMUSG00000021270 6.09713e-24
intersect <- list(ONT = row.names(ONT_agg.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

intersect <- list(ONT = row.names(ONT_agg.results.sorted)[23776:24276], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[23776:24276])

# 2D Venn diagram
ggVennDiagram(intersect)

Sort by baseMean value

ONT_agg.results.sorted <- ONT_agg.results %>% `[`(order(.$baseMean),)
head(ONT_agg.results.sorted)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE      stat    pvalue
##                    <numeric>      <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000000031         0             NA        NA        NA        NA
## ENSMUSG00000000125         0             NA        NA        NA        NA
## ENSMUSG00000000149         0             NA        NA        NA        NA
## ENSMUSG00000000154         0             NA        NA        NA        NA
## ENSMUSG00000000182         0             NA        NA        NA        NA
## ENSMUSG00000000215         0             NA        NA        NA        NA
##                         padj
##                    <numeric>
## ENSMUSG00000000031        NA
## ENSMUSG00000000125        NA
## ENSMUSG00000000149        NA
## ENSMUSG00000000154        NA
## ENSMUSG00000000182        NA
## ENSMUSG00000000215        NA
Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(.$baseMean),)
head(Illumina_filtered_DESeq.results.sorted)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE      stat    pvalue
##                    <numeric>      <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000057696 0.0518878       0.306369   3.06038  0.100108  0.920259
## ENSMUSG00000081883 0.0518878       0.306369   3.06038  0.100108  0.920259
## ENSMUSG00000082626 0.0518878       0.306369   3.06038  0.100108  0.920259
## ENSMUSG00000091223 0.0518878       0.306369   3.06038  0.100108  0.920259
## ENSMUSG00000094125 0.0518878       0.306369   3.06038  0.100108  0.920259
## ENSMUSG00000094336 0.0518878       0.306369   3.06038  0.100108  0.920259
##                         padj
##                    <numeric>
## ENSMUSG00000057696        NA
## ENSMUSG00000081883        NA
## ENSMUSG00000082626        NA
## ENSMUSG00000091223        NA
## ENSMUSG00000094125        NA
## ENSMUSG00000094336        NA
intersect <- list(ONT = row.names(ONT_agg.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

intersect <- list(ONT = row.names(ONT_agg.results.sorted)[23776:24276], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[23776:24276])

# 2D Venn diagram
ggVennDiagram(intersect)

Same comparisons but now without merging ONT replicates

head(ONT_filtered_DESeq.ds)
## class: DESeqDataSet 
## dim: 6 48 
## metadata(1): version
## assays(1): counts
## rownames(6): ENSMUSG00000000001 ENSMUSG00000000028 ...
##   ENSMUSG00000000049 ENSMUSG00000000056
## rowData names(0):
## colnames(48): FC1_F11 FC1_F15 ... FC4_GC19 FC4_GC20
## colData names(3): condition flowcell mouse
ONT_FC1_filtered_DESeq.ds <- ONT_filtered_DESeq.ds[,ONT_filtered_DESeq.ds$flowcell=="FC1"]
# Extract gene counts data
counts_ONT <- counts(ONT_FC1_filtered_DESeq.ds)
counts_Illumina <- counts(Illumina_filtered_DESeq.ds)

# Create an empty data frame to store results
CompareCounts <- data.frame()

# Create a list to store correlation results
cor_results <- list()


# Iterate over unique mice
for (mouse in unique(ONT_FC1_filtered_DESeq.ds$mouse)) {
  
  # Subset counts for the current mouse
  counts_mouse_ONT <- counts_ONT[, ONT_FC1_filtered_DESeq.ds$mouse == mouse]
  counts_mouse_Illumina <- counts_Illumina[, Illumina_filtered_DESeq.ds$mouse == mouse]
  
  # Create a data frame for the current condition
  current_comparison <- data.frame(
    ONT = log2(counts_mouse_ONT + 1),  # Add 1 to avoid log(0)
    Illumina = log2(counts_mouse_Illumina + 1)
  )
  
  # Perform Spearman correlation test
  spearman_cor <- cor.test(current_comparison$ONT, current_comparison$Illumina, method="spearman")
  cor_results[[mouse]] <- spearman_cor$estimate
  cat("Spearman correlation for", mouse, ":", spearman_cor$estimate, "\n")
  
  plot(
    current_comparison$ONT,
    current_comparison$Illumina,
    cex = 0.1,
    main = paste("ONT vs Illumina for", mouse),
    xlab = "ONT counts",
    ylab = "Illumina counts"
  )
}
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties
## Spearman correlation for F11 : 0.7089327
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F15 : 0.6807292
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F16 : 0.7007497
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F17 : 0.717357
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F18 : 0.6614535
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F19 : 0.7224934
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F20 : 0.7066639
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F9 : 0.699154
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC11 : 0.7022545
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC15 : 0.7140657
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC16 : 0.7155123
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC9 : 0.731864

Perform DESeq

ONT_filtered_DESeq.ds %<>% DESeq()
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 31 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
# normalize for diffs in sequencing depth and abundance per sample
ONT_filtered_DESeq.ds %<>% estimateSizeFactors()
# gene-wise dispersion estimates across all samples
ONT_filtered_DESeq.ds %<>% estimateDispersions()
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
# fit a neg. binomial GLM and compute Wald stat for each gene 
ONT_filtered_DESeq.ds %<>% nbinomWaldTest()

ONT_filtered_DESeq.ds
## class: DESeqDataSet 
## dim: 24276 48 
## metadata(1): version
## assays(6): counts mu ... replaceCounts replaceCooks
## rownames(24276): ENSMUSG00000000001 ENSMUSG00000000028 ...
##   ENSMUSG00002076809 ENSMUSG00002076896
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(48): FC1_F11 FC1_F15 ... FC4_GC19 FC4_GC20
## colData names(5): condition flowcell mouse sizeFactor replaceable
ONT_filtered_DESeq.results <- results(ONT_filtered_DESeq.ds, independentFiltering = TRUE, alpha = 0.05)
# the first line will tell you which comparison was done to achieve the log2FC 
head(ONT_filtered_DESeq.results)
## log2 fold change (MLE): condition GC vs F 
## Wald test p-value: condition GC vs F 
## DataFrame with 6 rows and 6 columns
##                     baseMean log2FoldChange     lfcSE       stat    pvalue
##                    <numeric>      <numeric> <numeric>  <numeric> <numeric>
## ENSMUSG00000000001  4.774709     -0.4765761  0.243622 -1.9562141 0.0504399
## ENSMUSG00000000028  0.889156     -0.1640239  0.474116 -0.3459573 0.7293748
## ENSMUSG00000000031  0.031658      0.1533898  2.944596  0.0520920 0.9584554
## ENSMUSG00000000037  0.180497      0.1589373  2.020046  0.0786801 0.9372871
## ENSMUSG00000000049  1.209067     -1.1214709  0.463943 -2.4172610 0.0156378
## ENSMUSG00000000056 11.066338     -0.0905604  0.132002 -0.6860529 0.4926798
##                         padj
##                    <numeric>
## ENSMUSG00000000001  0.430308
## ENSMUSG00000000028        NA
## ENSMUSG00000000031        NA
## ENSMUSG00000000037        NA
## ENSMUSG00000000049        NA
## ENSMUSG00000000056  0.878492
summary(ONT_filtered_DESeq.results)
## 
## out of 24276 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 86, 0.35%
## LFC < 0 (down)     : 67, 0.28%
## outliers [1]       : 31, 0.13%
## low counts [2]     : 17386, 72%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Sort by log2FoldChange

ONT_filtered_DESeq.results.sorted <- ONT_filtered_DESeq.results %>% `[`(order(.$log2FoldChange),)

Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(.$log2FoldChange),)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[23776:24276], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[23776:24276])

# 2D Venn diagram
ggVennDiagram(intersect)

Sort by padj

ONT_filtered_DESeq.results.sorted <- ONT_filtered_DESeq.results %>% `[`(order(.$padj),)

Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(.$padj),)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[23776:24276], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[23776:24276])

# 2D Venn diagram
ggVennDiagram(intersect)

Sort by pvalue

ONT_filtered_DESeq.results.sorted <- ONT_filtered_DESeq.results %>% `[`(order(.$pvalue),)

Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(.$pvalue),)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[23776:24276], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[23776:24276])

# 2D Venn diagram
ggVennDiagram(intersect)

Sort by stat value

ONT_filtered_DESeq.results.sorted <- ONT_filtered_DESeq.results %>% `[`(order(.$stat),)

Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(.$stat),)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[23776:24276], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[23776:24276])

# 2D Venn diagram
ggVennDiagram(intersect)

Sort by baseMean value

ONT_filtered_DESeq.results.sorted <- ONT_filtered_DESeq.results %>% `[`(order(.$baseMean),)

Illumina_filtered_DESeq.results.sorted <- Illumina_filtered_DESeq.results %>% `[`(order(.$baseMean),)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[1:500], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[1:500])

# 2D Venn diagram
ggVennDiagram(intersect)

intersect <- list(ONT = row.names(ONT_filtered_DESeq.results.sorted)[23776:24276], Illumina = row.names(Illumina_filtered_DESeq.results.sorted)[23776:24276])

# 2D Venn diagram
ggVennDiagram(intersect)

Gene counts Correlation test only between common DEGs/metric with most common genes

Use stat value

ONT_significant_results <-  ONT_filtered_DESeq.results[which(ONT_filtered_DESeq.results$padj<0.05),]
Illumina_significant_results <-  Illumina_filtered_DESeq.results[which(Illumina_filtered_DESeq.results$padj<0.05),]

common.sig.genes <- intersect(row.names(ONT_significant_results),row.names(Illumina_significant_results))

ONT_significant_counts <- counts(ONT_agg.ds[common.sig.genes,])
Illumina_significant_counts <- counts(Illumina_filtered_DESeq.ds[common.sig.genes,])
head(ONT_significant_counts)
##                    F11 F15  F16 F17 F18 F19 F20   F9 GC11 GC15 GC16 GC9 GC17
## ENSMUSG00000000628 240 272  416 191 103 217 196  220  128  154  127 162  181
## ENSMUSG00000002459  14  11   23  13  11  12  13   15   13   14   11  19   23
## ENSMUSG00000004630 292 288  538 301 252 263 271  284  178  178  216 191  232
## ENSMUSG00000004655  19  20   28  14  13  18  13   23   13    8   10  11   15
## ENSMUSG00000007682   3   4    5   1   3   2   2    7    4    5    5  10    9
## ENSMUSG00000015656 876 858 1153 663 596 732 607 1065  648  493  544 555  645
##                    GC18 GC19 GC20
## ENSMUSG00000000628  170  160  179
## ENSMUSG00000002459   20   14   14
## ENSMUSG00000004630  247  243  232
## ENSMUSG00000004655   13    8   10
## ENSMUSG00000007682    7    7    4
## ENSMUSG00000015656  598  585  548
head(Illumina_significant_counts)
##                      F11   F15   F16  F17   F18   F19   F20    F9  GC11  GC15
## ENSMUSG00000000628  9286  8623  7617 6075  9588  9569 10764  6574  6871  8955
## ENSMUSG00000002459   485   372   445  323   485   437   498   587   650   827
## ENSMUSG00000004630  2103  1696  1603 1823  2549  1932  2149  1823  1560  1910
## ENSMUSG00000004655   943   747   683  598   898   786   690   861   671   763
## ENSMUSG00000007682   162   220   138  146   257   241   220   388   397   495
## ENSMUSG00000015656 13735 12828 10994 9257 15795 14704 17839 15048 12448 14289
##                     GC16 GC17 GC18  GC19 GC20  GC9
## ENSMUSG00000000628  7539 6117 6455  7826 7768 5606
## ENSMUSG00000002459   602  562  683   815  621  619
## ENSMUSG00000004630  1941 1645 1756  1890 1727 1291
## ENSMUSG00000004655   566  434  492   696  552  466
## ENSMUSG00000007682   285  325  309   433  289  396
## ENSMUSG00000015656 11547 8713 9961 12990 9974 9092
# Create an empty data frame to store results
CompareCounts <- data.frame()

# Create a list to store correlation results
cor_results <- list()

# Iterate over unique mice
for (mouse in unique(ONT_agg.ds$mouse)) {
  
  # Subset counts for the current mouse
  counts_condition_ONT <- ONT_significant_counts[,mouse]
  counts_condition_Illumina <- Illumina_significant_counts[, mouse]
  
  # Create a data frame for the current condition
  current_comparison <- data.frame(
    ONT = log2(counts_condition_ONT + 1),  # Add 1 to avoid log(0)
    Illumina = log2(counts_condition_Illumina + 1)
  )
  
  # Perform Spearman correlation test
  spearman_cor <- cor.test(current_comparison$ONT, current_comparison$Illumina, method="spearman")
  cor_results[[mouse]] <- spearman_cor$estimate
  cat("Spearman correlation for", mouse, ":", spearman_cor$estimate, "\n")
  
  plot(
    current_comparison$ONT,
    current_comparison$Illumina,
    cex = 0.1,
    main = paste("ONT vs Illumina for", mouse),
    xlab = "ONT counts",
    ylab = "Illumina counts"
  )
}
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties
## Spearman correlation for F11 : 0.7711405
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F15 : 0.8018548
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F16 : 0.8005616
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F17 : 0.7960437
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F18 : 0.7721826
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F19 : 0.7992069
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F20 : 0.7862589
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F9 : 0.797229
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC11 : 0.7601281
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC15 : 0.7876607
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC16 : 0.7791152
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC9 : 0.7950686
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC17 : 0.7825785
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC18 : 0.7683062
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC19 : 0.791641
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC20 : 0.7775292

Random mouse comparison

# Set seed for reproducibility
set.seed(123)

# Number of random comparisons
num_comparisons <- 10  # Adjust as needed

# Randomly select mice for comparison
selected_mice <- sample(unique(ONT_agg.ds$mouse), num_comparisons)

# Iterate over selected mice
for (ONT_mouse in selected_mice) {
  
  # Subset counts for the current condition
  counts_mouse_ONT <- ONT_significant_counts[, ONT_mouse]
  
  # Randomly select a mouse from Illumina dataset
  random_mouse_Illumina <- sample(unique(Illumina_filtered_DESeq.ds$mouse), 1)
  counts_mouse_Illumina <- Illumina_significant_counts[, random_mouse_Illumina]
  
  # Create a data frame for the current comparison
  current_comparison <- data.frame(
    ONT = log2(counts_mouse_ONT + 1),  # Add 1 to avoid log(0)
    Illumina = log2(counts_mouse_Illumina + 1)
  )
  
  # Perform Spearman correlation test
  spearman_cor <- cor.test(current_comparison$ONT, current_comparison$Illumina, method="spearman")
  cor_results[[paste(ONT_mouse, "vs", random_mouse_Illumina)]] <- spearman_cor$estimate
  cat("Spearman correlation for", ONT_mouse, "vs", random_mouse_Illumina, ":", spearman_cor$estimate, "\n")
  plot(
    current_comparison$ONT,
    current_comparison$Illumina,
    cex = 0.1,
    main = paste("ONT vs Illumina for", ONT_mouse, "vs", random_mouse_Illumina),
    xlab = "ONT counts",
    ylab = "Illumina counts"
  )
}
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties
## Spearman correlation for GC19 vs F19 : 0.7949426
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC20 vs GC11 : 0.7590548
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F16 vs GC15 : 0.769016
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC18 vs GC16 : 0.780063
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC15 vs GC9 : 0.7870606
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F15 vs F18 : 0.8103248
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F19 vs F16 : 0.7972008
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F18 vs GC16 : 0.7379898
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for F17 vs GC11 : 0.7139959
## Warning in cor.test.default(current_comparison$ONT,
## current_comparison$Illumina, : Cannot compute exact p-value with ties

## Spearman correlation for GC9 vs GC17 : 0.7893117